df.head()
| sender_name | timestamp_ms | content | type | photos | gifs | audio_files | year | month | hour | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Mélanie Amazo'Night Bergeot | 2020-03-29 07:55:58.952 | :p | Generic | NaN | NaN | NaN | 2020 | 3 | 07:55 |
| 1 | Nicolas Cailleux | 2020-03-29 07:43:47.446 | t'as de quoi faire un peu quand meme :p | Generic | NaN | NaN | NaN | 2020 | 3 | 07:43 |
| 2 | Nicolas Cailleux | 2020-03-29 07:43:41.209 | exactement ça va | Generic | NaN | NaN | NaN | 2020 | 3 | 07:43 |
| 3 | Nicolas Cailleux | 2020-03-29 07:43:38.139 | enfin 59340 | Generic | NaN | NaN | NaN | 2020 | 3 | 07:43 |
| 4 | Nicolas Cailleux | 2020-03-29 07:42:49.732 | 60k msg | Generic | NaN | NaN | NaN | 2020 | 3 | 07:42 |
print(str(nb_day_first_last)+ ' day between ' +str(first_day)+ ' and ' +str(last_day))
1263 day between 2016-10-13 and 2020-03-29
#Calculate the number of "active day"
test_activeday=df
test_activeday['date'] = pd.to_datetime(test_activeday['timestamp_ms']).dt.date
nb_active_days=test_activeday["date"].nunique()
#%percentage of active day
pc_active_days=int(((nb_active_days*100)/nb_day_first_last))
print("%d (%d percent) of those were ‘active’ days (i.e. messages were sent)."%(nb_active_days,pc_active_days))
#voir comment le modifier en pourcentage
1084 (85 percent) of those were ‘active’ days (i.e. messages were sent).
print("%d messages. I sent %d more messages than my boyfriend"%(nb_msg_send,nb_diff_msg_send))
59389 messages. I sent 1277 more messages than my boyfriend
print("%d unique words were used (many of which are not real words). I used %d unique words where my boyfriend used %d."%(nb_unique_word,nb_unique_word_mel,nb_unique_word_nico))
6927 unique words were used (many of which are not real words). I used 4244 unique words where my boyfriend used 2683.
print("2 participants - my boyfriends and I - living in FR most of the time and in our early twenties.")
2 participants - my boyfriends and I - living in FR most of the time and in our early twenties.